library(data.table)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.5     ✔ purrr   0.3.4
## ✔ tibble  3.1.6     ✔ dplyr   1.0.7
## ✔ tidyr   1.1.4     ✔ stringr 1.4.0
## ✔ readr   2.1.1     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between()   masks data.table::between()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::first()     masks data.table::first()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ dplyr::last()      masks data.table::last()
## ✖ purrr::transpose() masks data.table::transpose()
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(pracma)
## 
## Attaching package: 'pracma'
## The following object is masked from 'package:purrr':
## 
##     cross
df <- read.csv("wolfberry.csv", row.names=NULL)

df[df$Y == 0, 1] = "LBPs"
df[df$Y == 2, 1] = "Dextran"
df[df$Y == 3, 1] = "Maltodextrin"
df[df$Y == 4, 1] = "Starch"

There are two general methods to perform PCA in R :

The function princomp() uses the spectral decomposition approach. The functions prcomp() and PCA()[FactoMineR] use the singular value decomposition (SVD).

inclusion1 <- paste0("X", c(1:779))

inclusion2 <- paste0("X", c(1038:1870))

inclusion3 <- paste0("X", c(1142:1870))

inclusion4 <- paste0("X", c(1298:1870))
df_sub <- df[, c("C", "Y", inclusion3)]

df.active <- df_sub %>% filter(Y != 1)
df.active <- df.active %>% filter(C == 0 | C == 0.1) 

df.active_x <- df.active %>% select(-c(Y, C))
dim(df.active_x)
## [1] 200 729

PCA (gradients)

Source

We apply the SVD-based PCA to minimize the individual variance.

value_prime <- pracma::gradient(as.numeric(df.active_x[1, ]), h1 = 1)
plot(value_prime, type = "l")

plot(as.numeric(df.active_x[1, ]), type = "l")

derivative <- data.frame()

for (i in c(1:nrow(df.active_x))) {
  value_prime <- pracma::gradient(as.numeric(df.active_x[i, ]), h1 = 1)
  value_prime_prime <- pracma::gradient(value_prime, h1 = 1)
  #value_prime <- c(value_prime, value_prime_prime)
  derivative <- rbind(derivative, value_prime)
}
res.pca <- prcomp(derivative, scale = FALSE)
fviz_eig(res.pca)

fviz_pca_ind(res.pca,
             axes=c(1,2),
             label="none",
             habillage=df.active$Y,
             addEllipses=TRUE, 
             ellipse.level=0.95
             )

3D

https://plotly.com/r/pca-visualization/

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(stats)
prin_comp <- res.pca
explained_variance_ratio <- summary(prin_comp)[["importance"]]['Proportion of Variance',]
explained_variance_ratio <- 100 * explained_variance_ratio
components <- prin_comp[["x"]]
components <- data.frame(components)
components <- cbind(components, df.active$Y)
components$PC3 <- -components$PC3
components$PC2 <- -components$PC2

axis = list(showline=FALSE,
            zeroline=FALSE,
            gridcolor='#ffff',
            ticklen=4,
            titlefont=list(size=13))

fig <- components %>%
  plot_ly()  %>%
  add_trace(
    type = 'splom',
    dimensions = list(
      list(label=paste('PC 1 (',toString(round(explained_variance_ratio[1],1)),'%)',sep = ''), values=~PC1),
      list(label=paste('PC 2 (',toString(round(explained_variance_ratio[2],1)),'%)',sep = ''), values=~PC2),
      list(label=paste('PC 3 (',toString(round(explained_variance_ratio[3],1)),'%)',sep = ''), values=~PC3),
      list(label=paste('PC 4 (',toString(round(explained_variance_ratio[4],1)),'%)',sep = ''), values=~PC4)
    ),
    color = ~factor(`df.active$Y`), colors = c('#636EFA','#EF553B','#00CC96', '#F2C43D')
  ) %>%
  style(diagonal = list(visible = FALSE)) %>%
  layout(
    legend=list(title=list(text='color')),
    hovermode='closest',
    dragmode= 'select',
    plot_bgcolor='rgba(240,240,240, 0.95)',
    xaxis=list(domain=NULL, showline=F, zeroline=F, gridcolor='#ffff', ticklen=4),
    yaxis=list(domain=NULL, showline=F, zeroline=F, gridcolor='#ffff', ticklen=4),
    xaxis2=axis,
    xaxis3=axis,
    xaxis4=axis,
    yaxis2=axis,
    yaxis3=axis,
    yaxis4=axis
  )

fig
prin_comp <- res.pca

components <- prin_comp[["x"]]
components <- data.frame(components)

components$PC3 <- -components$PC3
components$PC2 <- -components$PC2
components <- cbind(components, df.active$Y)

tot_explained_variance_ratio <- summary(prin_comp)[["importance"]]['Proportion of Variance',]
tot_explained_variance_ratio <- 100 * sum(tot_explained_variance_ratio)

#tit = 'Total Explained Variance = 99.48'

fig <- plot_ly(components, x = ~PC1, y = ~PC2, z = ~PC3, color = ~factor(`df.active$Y`), colors = c('#636EFA','#EF553B','#00CC96', '#F2C43D')) %>%
  add_markers(size = 30)


fig <- fig %>%
  layout(
    scene = list(bgcolor = "#e5ecf6")
)

fig